##GO and KEGG Enrichment function

##GO.Sim.PCA function

##GO20 Function

##KEGG20 Function

BP.GO.fig
CC.GO.fig
MF.GO.fig
KEGG.fig
heatmapPlot(simMatrix,
            reducedTerms,
            annotateParent=TRUE,
            annotationLabel="parentTerm",
            fontsize=6,
            main = "Repressor Gene Enriched Biological Process GO terms")

use.data   = simMatrix
GO.Sim.PCA(use.data)
treemapPlot(reducedTerms)

##GR.F vs WT.F specific volcano

specific.cat = c('none', 'parent.category', 'specific.kegg', 'specific.go')

specific.cat = specific.cat[1]
parent.category = 'neurogenesis'
specific.kegg = "DNA replication"
specific.go =  '32553'
Synaptic.genes = F

set.size = F
  
GOterm.genes = vector('list', length = length(unique(reducedTerms$parent)))
names(GOterm.genes) = unique(reducedTerms$parent)
i=1
while(i<=length(GOterm.genes)){
  goinparent = reducedTerms[grep(names(GOterm.genes)[i], reducedTerms$parent),'go']
  genesinparent = unlist(genesingo[goinparent])
  GOterm.genes[[i]] = GeneIDKey[GeneIDKey$ensembl %in% genesinparent, 'FBgn']
  i = i +1
}

volcano.data = data.frame(Symbol = GeneIDKey[row.names(TKT.EdgeR), "Symbol"], 
                          FDR = -log2(TKT.EdgeR[, 'GRxWT.FC']),
                          FC = TKT.EdgeR.FC[row.names(TKT.EdgeR), 'GRxWT.FC'],
                          ##Color = 0)
                          Color = 'grey',
                          size = 5*log(mean.cpm[row.names(TKT.EdgeR),"GR.F"]),
                          GR.F.cpm = mean.cpm[row.names(TKT.EdgeR),"GR.F"],
                          WT.F.cpm = mean.cpm[row.names(TKT.EdgeR),"WT.F"],
                          GO.terms = "none")

main.title = "Significant DEG in G85R Females vs. WT Females"

volcano.data$Color[volcano.data$FDR >= -log2(.05) & volcano.data$FC < 0] = 'lightblue'
volcano.data$Color[volcano.data$FDR >= -log2(.0001) & volcano.data$FC < 0] = 'steelblue'
volcano.data$Color[volcano.data$FDR >= -log2(.000001) & volcano.data$FC < 0] = 'dodgerblue'

volcano.data$Color[volcano.data$FDR >= -log2(.05) & volcano.data$FC > 0] = 'lightcoral'
volcano.data$Color[volcano.data$FDR >= -log2(.0001) & volcano.data$FC > 0] = 'tomato'
volcano.data$Color[volcano.data$FDR >= -log2(.000001) & volcano.data$FC > 0] = 'firebrick'

volcano.data$size[volcano.data$FDR <= -log2(.05)] = 3

if(set.size == T){
  volcano.data$size = 15
  volcano.data$size[volcano.data$FDR <= -log2(.05)] = 5
}

volcano.data$Color = 'grey'
volcano.data$Color[volcano.data$Symbol %in% Synaptic$Symbol] = 'deeppink'

if(specific.cat == 'parent.category'){
  parent.genes = GOterm.genes[[intersect(reducedTerms[reducedTerms$parentTerm == parent.category, "go"], names(GOterm.genes))]]
  volcano.data$Color = 'honeydew'
  volcano.data[parent.genes, 'Color'] = 'deeppink'
  volcano.data = volcano.data[order(volcano.data$Color, decreasing = T),]
  main.title = sig.cats$GO[c(grep(parent.category, sig.cats$GO$term), grep(parent.category, sig.cats$GO$category)), 'term']
}


if(specific.cat == 'specific.kegg'){
  kegg.id = sig.cats$KEGG[c(grep(specific.kegg, sig.cats$KEGG$Name), grep(specific.kegg, sig.cats$KEGG$category)), 'category']
  kegg.genes = names(kegg[grep(kegg.id, kegg)])
  volcano.data$Color = 'honeydew'
  row.names(volcano.data) = volcano.data$Symbol
    volcano.data[kegg.genes, 'Color'] = 'deeppink'
  volcano.data = volcano.data[order(volcano.data$Color, decreasing = T),]
  main.title = sig.cats$KEGG[c(grep(specific.kegg, sig.cats$KEGG$Name), grep(specific.kegg, sig.cats$KEGG$category)), 'Name']
}


if(specific.cat == 'specific.go'){
  go.id = sig.cats$GO[c(grep(specific.go, sig.cats$GO$term), grep(specific.go, sig.cats$GO$category)), 'category']
  volcano.data$Color = 'honeydew'
  go.genes = genesingo[[go.id]]
  go.genes = GeneIDKey[GeneIDKey$ensembl %in% go.genes, "FBgn"]
  go.genes = intersect(go.genes, row.names(volcano.data))
  volcano.data[go.genes, 'Color'] = 'deeppink'
  volcano.data = volcano.data[order(volcano.data$Color, decreasing = T),]
  main.title = sig.cats$GO[c(grep(specific.go, sig.cats$GO$term), grep(specific.go, sig.cats$GO$category)), 'term']
}

##volcano.data


fig = plot_ly(data = volcano.data,
              x = ~FC,
              y = ~FDR,
              type = 'scatter',
              mode = 'markers',
              marker = list(color = ~Color, colors = ~Color, size = volcano.data$size,
                            line = list(color = 'black', width = .5)),
              hoverinfo = "text",
              hovertext = paste("Gene:", volcano.data$Symbol,
                      "\n-log2(FDR): ", round(volcano.data$FDR,2),
                      "\nFC: ", round(volcano.data$FC,2),
                      "\nG85R mean cpm: ", round(volcano.data$GR.F.cpm, 1),
                      "\nWT mean cpm: ", round(volcano.data$WT.F.cpm, 1)))
fig = fig %>% layout(title = main.title)

              ##color = ~Color,
              ##colors = 'Spectral')
              ##color = ~Color,
              ##colors = ~Color)
              ##marker = list(colorscale = list(c(0,.5,1), c("blue","yellow", "red")), color = ~Color))

fig
specific.cat = c('none', 'parent.category', 'specific.kegg', 'specific.go')

specific.cat = specific.cat[1]
parent.category = 'chromosome organization'
specific.kegg = "DNA replication"
specific.go =  '006457'

set.size = F
  
GOterm.genes = vector('list', length = length(unique(reducedTerms$parent)))
names(GOterm.genes) = unique(reducedTerms$parent)
i=1
while(i<=length(GOterm.genes)){
  goinparent = reducedTerms[grep(names(GOterm.genes)[i], reducedTerms$parent),'go']
  genesinparent = unlist(genesingo[goinparent])
  GOterm.genes[[i]] = GeneIDKey[GeneIDKey$ensembl %in% genesinparent, 'FBgn']
  i = i +1
}

volcano.data = data.frame(Symbol = GeneIDKey[row.names(TKT.EdgeR), "Symbol"], 
                          FDR = -log2(TKT.EdgeR[, 'GRxWT.MC']),
                          FC = TKT.EdgeR.FC[row.names(TKT.EdgeR), 'GRxWT.MC'],
                          ##Color = 0)
                          Color = 'grey',
                          size = 5*log(mean.cpm[row.names(TKT.EdgeR),"GR.M"]),
                          GR.F.cpm = mean.cpm[row.names(TKT.EdgeR),"GR.M"],
                          WT.F.cpm = mean.cpm[row.names(TKT.EdgeR),"WT.M"],
                          GO.terms = "none")

main.title = "Significant DEG in G85R Males vs. WT Males"

volcano.data$Color[volcano.data$FDR >= -log2(.05) & volcano.data$FC < 0] = 'lightblue'
volcano.data$Color[volcano.data$FDR >= -log2(.0001) & volcano.data$FC < 0] = 'steelblue'
volcano.data$Color[volcano.data$FDR >= -log2(.000001) & volcano.data$FC < 0] = 'dodgerblue'

volcano.data$Color[volcano.data$FDR >= -log2(.05) & volcano.data$FC > 0] = 'lightcoral'
volcano.data$Color[volcano.data$FDR >= -log2(.0001) & volcano.data$FC > 0] = 'tomato'
volcano.data$Color[volcano.data$FDR >= -log2(.000001) & volcano.data$FC > 0] = 'firebrick'

volcano.data$size[volcano.data$FDR <= -log2(.05)] = 3

if(set.size == T){
  volcano.data$size = 15
  volcano.data$size[volcano.data$FDR <= -log2(.05)] = 5
}


if(specific.cat == 'parent.category'){
  parent.genes = GOterm.genes[[intersect(reducedTerms[reducedTerms$parentTerm == parent.category, "go"], names(GOterm.genes))]]
  volcano.data$Color = 'honeydew'
  volcano.data[parent.genes, 'Color'] = 'deeppink'
  volcano.data = volcano.data[order(volcano.data$Color, decreasing = T),]
  main.title = sig.cats$GO[c(grep(parent.category, sig.cats$GO$term), grep(parent.category, sig.cats$GO$category)), 'term']
}


if(specific.cat == 'specific.kegg'){
  kegg.id = sig.cats$KEGG[c(grep(specific.kegg, sig.cats$KEGG$Name), grep(specific.kegg, sig.cats$KEGG$category)), 'category']
  kegg.genes = names(kegg[grep(kegg.id, kegg)])
  volcano.data$Color = 'honeydew'
  row.names(volcano.data) = volcano.data$Symbol
    volcano.data[kegg.genes, 'Color'] = 'deeppink'
  volcano.data = volcano.data[order(volcano.data$Color, decreasing = T),]
  main.title = sig.cats$KEGG[c(grep(specific.kegg, sig.cats$KEGG$Name), grep(specific.kegg, sig.cats$KEGG$category)), 'Name']
}


if(specific.cat == 'specific.go'){
  go.id = sig.cats$GO[c(grep(specific.go, sig.cats$GO$term), grep(specific.go, sig.cats$GO$category)), 'category']
  volcano.data$Color = 'honeydew'
  go.genes = genesingo[[go.id]]
  go.genes = GeneIDKey[GeneIDKey$ensembl %in% go.genes, "FBgn"]
  go.genes = intersect(go.genes, row.names(volcano.data))
  volcano.data[go.genes, 'Color'] = 'deeppink'
  volcano.data = volcano.data[order(volcano.data$Color, decreasing = T),]
  main.title = sig.cats$GO[c(grep(specific.go, sig.cats$GO$term), grep(specific.go, sig.cats$GO$category)), 'term']
}

##volcano.data


fig = plot_ly(data = volcano.data,
              x = ~FC,
              y = ~FDR,
              type = 'scatter',
              mode = 'markers',
              marker = list(color = ~Color, colors = ~Color, size = volcano.data$size,
                            line = list(color = 'black', width = .5)),
              hoverinfo = "text",
              hovertext = paste("Gene:", volcano.data$Symbol,
                      "\n-log2(FDR): ", round(volcano.data$FDR,2),
                      "\nFC: ", round(volcano.data$FC,2),
                      "\nG85R mean cpm: ", round(volcano.data$GR.F.cpm, 1),
                      "\nWT mean cpm: ", round(volcano.data$WT.F.cpm, 1)))
fig = fig %>% layout(title = main.title)

fig
rawdata = read.csv("/Users/johncsantiago/Documents/GitHub/WhartonLab/Metabolomics/MetabolomicsForMetaboanalyst.txt", sep = "\t", row.names = 1)

KEGG.Key = read.csv("/Users/johncsantiago/Documents/GitHub/WhartonLab/Metabolomics/RawMetabolomicData.csv", row.names = 1)
KEGG.Key = setNames(KEGG.Key[-c(1:2), 2], row.names(KEGG.Key)[-c(1:2)])

genotypes = setNames(c("WT", "GR", "TktOEWT", "TktOEGR", "TktDfWT", "TktDfGR"), c("C", "E", "A", "F", "B", "D"))

metadata = data.frame(Sample = colnames(rawdata), 
                      Group = as.character(rawdata[1,]), 
                      TIC = as.numeric(rawdata[2,]), 
                      Genotypes = genotypes[as.character(rawdata[1,])])



norm.func = function(data){
  nd = (as.numeric(data)/metadata$TIC)*1000
  return(nd)
}

norm.data = t(apply(rawdata[3:nrow(rawdata),], 1, norm.func))
colnames(norm.data) = colnames(rawdata)

group.mean=function(data){
  gm = mean(na.omit(data))
}

mean.data = matrix(0, nrow=nrow(norm.data), ncol = length(genotypes))
row.names(mean.data) = row.names(norm.data)
colnames(mean.data) = unique(metadata$Group)

i=1
while(i<=ncol(mean.data)){
  mean.data[,i] = apply(norm.data[,metadata[metadata$Group == colnames(mean.data)[i], "Sample"]], 1, group.mean)
  i = i+1
}

compare.conditions = function(condition1, condition2){
  columns1 = metadata[metadata$Group == condition1, "Sample"]
  columns2 = metadata[metadata$Group == condition2, "Sample"]
  p=setNames(rep(NA, nrow(norm.data)), row.names(norm.data))
  i=1
  while(i<=length(p)){
    if(length(na.omit(norm.data[i,columns1]))>2 & 
       length(na.omit(norm.data[i,columns2]))>2){
      temp = t.test(na.omit(norm.data[i,columns1]),
                    na.omit(norm.data[i,columns2]), paired = F, var.equal = T)
      p[i] = temp[[3]]
      }
    i=i+1
    }

  fdr = p.adjust(p, "BH", length(p))

  fc = mean.data[,condition1]/mean.data[,condition2]

  comparison.table = data.frame(p = p, 
                                FDR = fdr, 
                                FC = fc, 
                                KEGG = KEGG.Key[row.names(mean.data)])
  
  comparison.table = comparison.table[order(comparison.table$FDR),]
}

GRxWT.C = compare.conditions("E", "C")
GRxWT.Df = compare.conditions("D", "B")
GRxWT.OE = compare.conditions("F", "A")
set.size == F
## [1] TRUE
volcano.data = data.frame(Symbol = row.names(GRxWT.C), 
                          FDR = -log2(GRxWT.C$FDR),
                          FC = log2(GRxWT.C$FC),
                          Color = 'honeydew',
                          size = log(mean.data[row.names(GRxWT.C),'E']),
                          GR.Level = mean.data[row.names(GRxWT.C), "E"],
                          WT.Level = mean.data[row.names(GRxWT.C), "C"],
                          GO.terms = "none")

main.title = "FC for metabolites in G85R vs. WT"

volcano.data$Color[volcano.data$FDR >= -log2(.05) & volcano.data$FC < 0] = 'lightblue'
volcano.data$Color[volcano.data$FDR >= -log2(.0001) & volcano.data$FC < 0] = 'steelblue'
volcano.data$Color[volcano.data$FDR >= -log2(.000001) & volcano.data$FC < 0] = 'dodgerblue'

volcano.data$Color[volcano.data$FDR >= -log2(.05) & volcano.data$FC > 0] = 'lightcoral'
volcano.data$Color[volcano.data$FDR >= -log2(.0001) & volcano.data$FC > 0] = 'tomato'
volcano.data$Color[volcano.data$FDR >= -log2(.000001) & volcano.data$FC > 0] = 'firebrick'

##volcano.data$size[volcano.data$FDR <= -log2(.05)] = 3

if(set.size == T){
  volcano.data$size = 15
  volcano.data$size[volcano.data$FDR <= -log2(.05)] = 5
}

fig = plot_ly(data = volcano.data,
              x = ~FC,
              y = ~FDR,
              type = 'scatter',
              mode = 'markers',
              marker = list(color = ~Color, colors = ~Color, size = volcano.data$size,
                            line = list(color = 'black', width = .5)),
              hoverinfo = "text",
              hovertext = paste("Metabolite:", volcano.data$Symbol,
                      "\n-log2(FDR): ", round(volcano.data$FDR,2),
                      "\nFC: ", round(volcano.data$FC,2),
                      "\nG85R mean cpm: ", round(volcano.data$GR.Level, 1),
                      "\nWT mean cpm: ", round(volcano.data$WT.Level, 1)))
fig = fig %>% layout(title = main.title)

fig
## Warning: Ignoring 71 observations